#! /bin/env python 
'''
Created on Oct 28, 2012

@author: Shunping Huang
'''

import csv
import gc
import argparse as ap
import gzip
import logging
from time import localtime,strftime

from modtools import vcfreader as vcf
from modtools.variants import parseVariant, SUB, INS, DEL
from modtools.utils import readableFile, writableFile, validChromList
from modtools import version
from modtools import metadata

DESC = 'A VCF to MOD converter.'
__version__ = '0.1.0'
VERBOSITY = 1
logger = None


def initLogger():
    global logger
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter("[%(asctime)s] %(name)-10s: %(levelname)s: %(message)s",
                                  "%Y-%m-%d %H:%M:%S")
    ch.setFormatter(formatter)
    logger.addHandler(ch)


if __name__ == '__main__':   
    initLogger()
    p = ap.ArgumentParser(description=DESC, 
                          formatter_class = ap.RawTextHelpFormatter)
    # Optional arguments
    group = p.add_mutually_exclusive_group()    
    group.add_argument("-q", dest='quiet', action='store_true',
                        help='quiet mode')
    group.add_argument('-v', dest='verbosity', action="store_const", const=2,
                        default=1, help='verbose mode')                    
    p.add_argument('-c', metavar='chromList', dest='chroms', 
                   type=validChromList, default = [],
                   help='a comma-separated list of chromosomes in output' +
                        ' (default: all)')
    p.add_argument('-o', metavar='mod', dest='mod', 
                   type=writableFile, default=None, 
                   help='the output mod file'\
                        +' (default: <sample>.mod)')
    # Required arguments
    p.add_argument('ref', help='reference name')
    p.add_argument('sample', help='requested sample name in VCF')
    p.add_argument('infiles', metavar='vcf', nargs='+', 
                   type=readableFile, help='input VCF file(s)')
    
    args = p.parse_args()
    
    if args.quiet:                
        logger.setLevel(logging.CRITICAL)
    elif args.verbosity == 2:                    
        logger.setLevel(logging.DEBUG)
                    
    if args.mod is None:                                
        modfp = gzip.open(args.sample + '.mod', 'wb')
    else:
        modfp = gzip.open(args.mod, 'wb')    
                        
    chroms = args.chroms        
    sample = args.sample
    ref = args.ref                     
    infiles = args.infiles    
    nFiles = len(infiles)

    meta = metadata.MetaData(ref)
    chromAliases = meta.chromAliases
    
    logger.info("from %s to %s", ref, sample)
    logger.info("input VCF file(s): %s", 
                ', '.join([infiles[i] for i in range(nFiles)]))                
    logger.info("output MOD file: %s", modfp.name)
        
    vcfs = [vcf.VCFReader(infiles[i], [sample]) for i in range(nFiles)]            
    
    # Use all chromosomes found in any VCFs
    if len(chroms) == 0:
        allChroms = set()
        for i in range(nFiles):            
            allChroms |= set(vcfs[i].chroms)        
        chroms = sorted(allChroms)
    
    modfp.write("#version=%s\n" % version.__mod_version__)
    modfp.write("#date=%s\n" % strftime("%Y%m%d",localtime()))
    modfp.write("#reference=%s\n" % ref)
    modfp.write("#sample=%s\n" % sample)    
    out = csv.writer(modfp, delimiter='\t',lineterminator='\n')
    
    for modChrom in chroms:  # for each chromosome        
        gc.disable()
        nSub = 0            
        nIns = 0
        nDel = 0
        pool = []        
        chrom = chromAliases.getBasicName(modChrom)
        aliases = chromAliases.getAliasNames(chrom)        
        for i in range(nFiles): # for each VCF file
            isAliasFound = False
            
            for alias in aliases: # for each alias                
                logger.info("try alias '%s' for chromosome '%s'", 
                            alias, modChrom)
                if alias in vcfs[i].chroms:
                    isAliasFound = True
                    count = 0                                            
                    logger.info("processing chromosome alias '%s' in %s", 
                                alias, vcfs[i].fileName)
                    for tup in vcfs[i].fetch(alias):
                        v = parseVariant(modChrom, tup[1], tup[2], tup[3])
                        if v.type == SUB:
                            pool.append(('s', modChrom, v.start[1], v.extra))
                            nSub += 1                
                        elif v.type == INS:
                            pool.append(('i', modChrom, v.start[1], v.extra))
                            nIns += v.length             
                        elif v.type == DEL:
                            # Change non-atomic deletions to atomic
                            for j in range(v.length):
                                pool.append(('d', modChrom, v.start[1]+j, 
                                             v.extra[j]))
                            nDel += v.length                     
                        else:
                            raise ValueError("Unknown variant type: %s" % 
                                             v.type)                        
                        count += 1
                    
                    logger.info("%d variant(s) found in %s", 
                                count, vcfs[i].fileName)
                else:                    
                    logger.warning("chromosome alias '%s' not found in %s", 
                                   alias, vcfs[i].fileName)
                        
            if not isAliasFound:
                logger.warning("chromosome '%s' not found in %s", 
                               modChrom, vcfs[i].fileName)
                            
        pool=sorted(set(pool), key = lambda tup: tup[2])            
        out.writerows(pool)
                    
        logger.info("%d line(s) written to MOD", len(pool))
        if len(pool) > 0:
            logger.info("SNPs: %d base(s)", nSub)
            logger.info("Insertions: %d base(s)", nIns)
            logger.info("Deletions: %d base(s)", nDel)
            
        del pool
        gc.enable()                        
    
    modfp.close()
        
    logger.info("All Done!")